{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "dataA = np.mat([1,2,3,3,2,1])\n",
    "dataB = np.mat([1,2,3,8,3,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.16139047779640892\n"
     ]
    }
   ],
   "source": [
    "# 欧氏距离 \n",
    "def EuclideanDistance(dataA,dataB):\n",
    "    # 归一化\n",
    "    # np.linalg.norm 用于范数计算，默认是二范数，相当于平方和开根号\n",
    "    return 1.0 / (1.0 + np.linalg.norm(dataA - dataB))\n",
    "print(EuclideanDistance(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.95564659]]\n"
     ]
    }
   ],
   "source": [
    "# 夹角余弦\n",
    "def Cosine(dataA,dataB):\n",
    "    sumData = dataA *dataB.T # 若列为向量则为 dataA.T * dataB\n",
    "    denom = np.linalg.norm(dataA) * np.linalg.norm(dataB)\n",
    "    # 归一化\n",
    "    return 0.5 + 0.5 * (sumData / denom)\n",
    "print(Cosine(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8601801351125985\n"
     ]
    }
   ],
   "source": [
    "# 皮尔逊相关系数\n",
    "def Pearson(dataA,dataB):\n",
    "    # 在当前物品没有人打分的时候（小于3）直接返回1.0相当于完全相似\n",
    "    # if len(np.nonzero(dataA)[0]) < 3 : return 1.0\n",
    "    # 皮尔逊相关系数的取值范围(-1 ~ 1),0.5 + 0.5 * result 归一化(0 ~ 1)\n",
    "    return 0.5 + 0.5 * np.corrcoef(dataA,dataB,rowvar = 0)[0][1]\n",
    "print(Pearson(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "# 汉明距离\n",
    "def hammingDistance(dataA,dataB):\n",
    "    distanceArr = dataA - dataB\n",
    "    return np.sum(distanceArr == 0)# 若列为向量则为 shape[0]\n",
    "print(hammingDistance(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7\n"
     ]
    }
   ],
   "source": [
    "# 曼哈顿距离(Manhattan Distance)\n",
    "def Manhattan(dataA,dataB):\n",
    "    return np.sum(np.abs(dataA - dataB))\n",
    "print(Manhattan(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.86018014]]\n"
     ]
    }
   ],
   "source": [
    "# 余弦相似度与修正余弦相似度与皮尔逊相关系数的关系\n",
    "# Pearson 减去的是每个item i 的被打分的均值\n",
    "def Pearson(dataA,dataB):\n",
    "    avgA = np.mean(dataA)\n",
    "    avgB = np.mean(dataB)\n",
    "    sumData = (dataA - avgA) * (dataB - avgB).T # 若列为向量则为 dataA.T * dataB\n",
    "    denom = np.linalg.norm(dataA - avgA) * np.linalg.norm(dataB - avgB)\n",
    "    # 归一化\n",
    "    return 0.5 + 0.5 * (sumData / denom)\n",
    "print(Pearson(dataA,dataB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.92640143]]\n"
     ]
    }
   ],
   "source": [
    "# 修正余弦相似度\n",
    "# 修正cosine 减去的是对item i打过分的每个user u，其打分的均值\n",
    "dataA = np.mat([[1,2,3],[2,3,4]])\n",
    "dataB = np.mat([[1,2,3],[3,3,3]])\n",
    "avgA = np.mean(dataA[:,0]) # 下标0表示正在打分的用户\n",
    "avgB = np.mean(dataB[:,0])\n",
    "def AdjustedCosine(dataA,dataB,avgA,avgB):\n",
    "    sumData = (dataA - avgA) * (dataB - avgB).T # 若列为向量则为 dataA.T * dataB\n",
    "    denom = np.linalg.norm(dataA - avgA) * np.linalg.norm(dataB - avgB)\n",
    "    return 0.5 + 0.5 * (sumData / denom)\n",
    "# 为第一个物品打分\n",
    "print(AdjustedCosine(dataA[0,:],dataB[0,:],avgA,avgB))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
